# ========================================================================= #
# Project: Lexical Ambiguity in Political Rhetoric (BJPolS)
# - Script: Compute sentence embeddings
# - Author: Patrick Kraft
# ========================================================================= #

import numpy as np
import gensim.downloader as api
from fse import IndexedLineDocument
from fse.models import uSIF

## load embeddings
glove = api.load("glove-wiki-gigaword-100")

## load sentences
doc = IndexedLineDocument("../out/sentences.txt")
i = 0
for s in doc:
    print(f"{s[1]}\t{s[0]}")
    i += 1
    if i == 4:
        break

## compute sentence embeddings
model = uSIF(glove, workers=2, lang_freq="en")
model.train(doc)

## export embeddings
np.savetxt("../out/embeddings.csv", model.sv.vectors, delimiter=",")
